!--------------------------------------------------------------------------------------------------!
!   CP2K: A general program to perform molecular dynamics simulations                              !
!   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
!                                                                                                  !
!   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
!--------------------------------------------------------------------------------------------------!

! **************************************************************************************************
!> \brief Collection of simple mathematical functions and subroutines
!> \par History
!>      FUNCTION angle updated and FUNCTION dihedral angle added; cleaned
!>      (13.03.2004,MK)
!> \author MK (15.11.1998)
! **************************************************************************************************
MODULE mathlib

   USE kinds,                           ONLY: default_string_length,&
                                              dp
   USE mathconstants,                   ONLY: euler,&
                                              fac,&
                                              oorootpi,&
                                              z_one,&
                                              z_zero
#include "../base/base_uses.f90"

   IMPLICIT NONE

   PRIVATE

   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mathlib'
   REAL(KIND=dp), PARAMETER             :: eps_geo = 1.0E-6_dp

   ! Public subroutines

   PUBLIC :: build_rotmat, &
             jacobi, &
             diamat_all, &
             invmat, &
             invmat_symm, &
             invert_matrix, &
             get_pseudo_inverse_svd, &
             get_pseudo_inverse_diag, &
             symmetrize_matrix, &
             unit_matrix, diag, &
             erfc_cutoff, &
             diag_antisym, &
             diag_complex, &
             gemm_square

   ! Public functions

   PUBLIC :: angle, &
             binomial, &
             binomial_gen, &
             multinomial, &
             det_3x3, &
             dihedral_angle, &
             gcd, &
             inv_3x3, &
             lcm, &
             vector_product, &
             pswitch, &
             rotate_vector, &
             reflect_vector, &
             expint, abnormal_value, &
             get_diag, &
             set_diag

   INTERFACE det_3x3
      MODULE PROCEDURE det_3x3_1, det_3x3_2
   END INTERFACE

   INTERFACE invert_matrix
      MODULE PROCEDURE invert_matrix_d, invert_matrix_z
   END INTERFACE

   INTERFACE set_diag
      MODULE PROCEDURE set_diag_scalar_d, set_diag_scalar_z
   END INTERFACE

   INTERFACE swap
      MODULE PROCEDURE swap_scalar, swap_vector
   END INTERFACE

   INTERFACE unit_matrix
      MODULE PROCEDURE unit_matrix_d, unit_matrix_z
   END INTERFACE

   INTERFACE gemm_square
      MODULE PROCEDURE zgemm_square_2, zgemm_square_3, dgemm_square_2, dgemm_square_3
   END INTERFACE

CONTAINS

! **************************************************************************************************
!> \brief Polynomial (5th degree) switching function
!>        f(a) = 1 .... f(b) = 0 with f'(a) = f"(a) = f'(b) = f"(b) = 0
!> \param x ...
!> \param a ...
!> \param b ...
!> \param order ...
!> \return =0 : f(x)
!> \return =1 : f'(x)
!> \return =2 : f"(x)
! **************************************************************************************************
   FUNCTION pswitch(x, a, b, order) RESULT(fx)
      REAL(KIND=dp)                                      :: x, a, b
      INTEGER                                            :: order
      REAL(KIND=dp)                                      :: fx

      REAL(KIND=dp)                                      :: u, u2, u3

      CPASSERT(b > a)
      IF (x < a .OR. x > b) THEN
         ! outside switching intervall
         IF (order > 0) THEN
            ! derivatives are 0
            fx = 0.0_dp
         ELSE
            IF (x < a) THEN
               ! x < a => f(x) = 1
               fx = 1.0_dp
            ELSE
               ! x > b => f(x) = 0
               fx = 0.0_dp
            END IF
         END IF
      ELSE
         ! renormalized coordinate
         u = (x - a)/(b - a)
         SELECT CASE (order)
         CASE (0)
            u2 = u*u
            u3 = u2*u
            fx = 1._dp - 10._dp*u3 + 15._dp*u2*u2 - 6._dp*u2*u3
         CASE (1)
            u2 = u*u
            fx = -30._dp*u2 + 60._dp*u*u2 - 30._dp*u2*u2
            fx = fx/(b - a)
         CASE (2)
            u2 = u*u
            fx = -60._dp*u + 180._dp*u2 - 120._dp*u*u2
            fx = fx/(b - a)**2
         CASE DEFAULT
            CPABORT('order not defined')
         END SELECT
      END IF

   END FUNCTION pswitch

! **************************************************************************************************
!> \brief determines if a value is not normal (e.g. for Inf and Nan)
!>        based on IO to work also under optimization.
!> \param a input value
!> \return TRUE for NaN and Inf
! **************************************************************************************************
   LOGICAL FUNCTION abnormal_value(a)
      REAL(KIND=dp)                                      :: a

      CHARACTER(LEN=32)                                  :: buffer

      abnormal_value = .FALSE.
      ! the function should work when compiled with -ffast-math and similar
      ! unfortunately, that option asserts that all numbers are normals,
      ! which the compiler uses to optimize the function to .FALSE. if based on the IEEE module
      ! therefore, pass this to the Fortran runtime/printf, if things are NaN or Inf, error out.
      WRITE (buffer, *) a
      IF (INDEX(buffer, "N") /= 0 .OR. INDEX(buffer, "n") /= 0) abnormal_value = .TRUE.

   END FUNCTION abnormal_value

! **************************************************************************************************
!> \brief  Calculation of the angle between the vectors a and b.
!>         The angle is returned in radians.
!> \param a ...
!> \param b ...
!> \return ...
!> \date    14.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION angle(a, b) RESULT(angle_ab)
      REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: a, b
      REAL(KIND=dp)                                      :: angle_ab

      REAL(KIND=dp)                                      :: length_of_a, length_of_b
      REAL(KIND=dp), DIMENSION(SIZE(a, 1))               :: a_norm, b_norm

      length_of_a = SQRT(DOT_PRODUCT(a, a))
      length_of_b = SQRT(DOT_PRODUCT(b, b))

      IF ((length_of_a > eps_geo) .AND. (length_of_b > eps_geo)) THEN
         a_norm(:) = a(:)/length_of_a
         b_norm(:) = b(:)/length_of_b
         angle_ab = ACOS(MIN(MAX(DOT_PRODUCT(a_norm, b_norm), -1.0_dp), 1.0_dp))
      ELSE
         angle_ab = 0.0_dp
      END IF

   END FUNCTION angle

! **************************************************************************************************
!> \brief   The binomial coefficient n over k for 0 <= k <= n is calculated,
!>            otherwise zero is returned.
!> \param n ...
!> \param k ...
!> \return ...
!> \date    08.03.1999
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   ELEMENTAL FUNCTION binomial(n, k) RESULT(n_over_k)
      INTEGER, INTENT(IN)                                :: n, k
      REAL(KIND=dp)                                      :: n_over_k

      IF ((k >= 0) .AND. (k <= n)) THEN
         n_over_k = fac(n)/(fac(n - k)*fac(k))
      ELSE
         n_over_k = 0.0_dp
      END IF

   END FUNCTION binomial

! **************************************************************************************************
!> \brief   The generalized binomial coefficient z over k for 0 <= k <= n is calculated.
!>            (z)   z*(z-1)*...*(z-k+2)*(z-k+1)
!>            ( ) = ---------------------------
!>            (k)                 k!
!> \param z ...
!> \param k ...
!> \return ...
!> \date    11.11.2019
!> \author  FS
!> \version 1.0
! **************************************************************************************************
   ELEMENTAL FUNCTION binomial_gen(z, k) RESULT(z_over_k)
      REAL(KIND=dp), INTENT(IN)                          :: z
      INTEGER, INTENT(IN)                                :: k
      REAL(KIND=dp)                                      :: z_over_k

      INTEGER                                            :: i

      IF (k >= 0) THEN
         z_over_k = 1.0_dp
         DO i = 1, k
            z_over_k = z_over_k*(z - i + 1)/REAL(i, dp)
         END DO
      ELSE
         z_over_k = 0.0_dp
      END IF

   END FUNCTION binomial_gen

! **************************************************************************************************
!> \brief Calculates the multinomial coefficients
!> \param n ...
!> \param k ...
!> \return ...
!> \author Ole Schuett
! **************************************************************************************************
   PURE FUNCTION multinomial(n, k) RESULT(res)
      INTEGER, INTENT(IN)                                :: n
      INTEGER, DIMENSION(:), INTENT(IN)                  :: k
      REAL(KIND=dp)                                      :: res

      INTEGER                                            :: i
      REAL(KIND=dp)                                      :: denom

      IF (ALL(k >= 0) .AND. SUM(k) == n) THEN
         denom = 1.0_dp
         DO i = 1, SIZE(k)
            denom = denom*fac(k(i))
         END DO
         res = fac(n)/denom
      ELSE
         res = 0.0_dp
      END IF

   END FUNCTION multinomial

! **************************************************************************************************
!> \brief   The rotation matrix rotmat which rotates a vector about a
!>          rotation axis defined by the vector a is build up.
!>          The rotation angle is phi (radians).
!> \param phi ...
!> \param a ...
!> \param rotmat ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE SUBROUTINE build_rotmat(phi, a, rotmat)
      REAL(KIND=dp), INTENT(IN)                          :: phi
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: a
      REAL(KIND=dp), DIMENSION(3, 3), INTENT(OUT)        :: rotmat

      REAL(KIND=dp)                                      :: cosp, cost, length_of_a, sinp
      REAL(KIND=dp), DIMENSION(3)                        :: d

      length_of_a = SQRT(a(1)*a(1) + a(2)*a(2) + a(3)*a(3))
      ! Check the length of the vector a
      IF (length_of_a > eps_geo) THEN

         d(:) = a(:)/length_of_a

         cosp = COS(phi)
         sinp = SIN(phi)
         cost = 1.0_dp - cosp

         rotmat(1, 1) = d(1)*d(1)*cost + cosp
         rotmat(1, 2) = d(1)*d(2)*cost - d(3)*sinp
         rotmat(1, 3) = d(1)*d(3)*cost + d(2)*sinp
         rotmat(2, 1) = d(2)*d(1)*cost + d(3)*sinp
         rotmat(2, 2) = d(2)*d(2)*cost + cosp
         rotmat(2, 3) = d(2)*d(3)*cost - d(1)*sinp
         rotmat(3, 1) = d(3)*d(1)*cost - d(2)*sinp
         rotmat(3, 2) = d(3)*d(2)*cost + d(1)*sinp
         rotmat(3, 3) = d(3)*d(3)*cost + cosp
      ELSE
         CALL unit_matrix(rotmat)
      END IF

   END SUBROUTINE build_rotmat

! **************************************************************************************************
!> \brief   Returns the determinante of the 3x3 matrix a.
!> \param a ...
!> \return ...
!> \date    13.03.2004
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION det_3x3_1(a) RESULT(det_a)
      REAL(KIND=dp), DIMENSION(3, 3), INTENT(IN)         :: a
      REAL(KIND=dp)                                      :: det_a

      det_a = a(1, 1)*(a(2, 2)*a(3, 3) - a(2, 3)*a(3, 2)) + &
              a(1, 2)*(a(2, 3)*a(3, 1) - a(2, 1)*a(3, 3)) + &
              a(1, 3)*(a(2, 1)*a(3, 2) - a(2, 2)*a(3, 1))

   END FUNCTION det_3x3_1

! **************************************************************************************************
!> \brief   Returns the determinante of the 3x3 matrix a given by its columns.
!> \param a1 ...
!> \param a2 ...
!> \param a3 ...
!> \return ...
!> \date    13.03.2004
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION det_3x3_2(a1, a2, a3) RESULT(det_a)
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: a1, a2, a3
      REAL(KIND=dp)                                      :: det_a

      det_a = a1(1)*(a2(2)*a3(3) - a3(2)*a2(3)) + &
              a2(1)*(a3(2)*a1(3) - a1(2)*a3(3)) + &
              a3(1)*(a1(2)*a2(3) - a2(2)*a1(3))

   END FUNCTION det_3x3_2

! **************************************************************************************************
!> \brief Diagonalize the symmetric n by n matrix a using the LAPACK
!>        library. Only the upper triangle of matrix a is used.
!>        Externals (LAPACK 3.0)
!> \param a ...
!> \param eigval ...
!> \param dac ...
!> \date    29.03.1999
!> \par Variables
!>      - a       : Symmetric matrix to be diagonalized (input; upper triangle) ->
!>      -           eigenvectors of the matrix a (output).
!>      - dac     : If true, then the divide-and-conquer algorithm is applied.
!>      - eigval  : Eigenvalues of the matrix a (output).
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   SUBROUTINE diamat_all(a, eigval, dac)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a
      REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigval
      LOGICAL, INTENT(IN), OPTIONAL                      :: dac

      CHARACTER(len=*), PARAMETER                        :: routineN = 'diamat_all'

      INTEGER                                            :: handle, info, liwork, lwork, n, nb
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iwork
      INTEGER, EXTERNAL                                  :: ilaenv
      LOGICAL                                            :: divide_and_conquer
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: work

      EXTERNAL dsyev, dsyevd

      CALL timeset(routineN, handle)

      ! Get the size of the matrix a
      n = SIZE(a, 1)

      ! Check the size of matrix a
      IF (SIZE(a, 2) /= n) THEN
         CPABORT("Check the size of matrix a (parameter #1)")
      END IF

      ! Check the size of vector eigval
      IF (SIZE(eigval) /= n) THEN
         CPABORT("The dimension of vector eigval is too small")
      END IF

      ! Check, if the divide-and-conquer algorithm is requested

      IF (PRESENT(dac)) THEN
         divide_and_conquer = dac
      ELSE
         divide_and_conquer = .FALSE.
      END IF

      ! Get the optimal work storage size

      IF (divide_and_conquer) THEN
         lwork = 2*n**2 + 6*n + 1
         liwork = 5*n + 3
      ELSE
         nb = ilaenv(1, "DSYTRD", "U", n, -1, -1, -1)
         lwork = (nb + 2)*n
      END IF

      ! Allocate work storage

      ALLOCATE (work(lwork))
      IF (divide_and_conquer) THEN
         ALLOCATE (iwork(liwork))
      END IF

      ! Diagonalize the matrix a

      info = 0
      IF (divide_and_conquer) THEN
         CALL dsyevd("V", "U", n, a, n, eigval, work, lwork, iwork, liwork, info)
      ELSE
         CALL dsyev("V", "U", n, a, n, eigval, work, lwork, info)
      END IF

      IF (info /= 0) THEN
         IF (divide_and_conquer) THEN
            CPABORT("The matrix diagonalization with dsyevd failed")
         ELSE
            CPABORT("The matrix diagonalization with dsyev failed")
         END IF
      END IF

      ! Release work storage
      DEALLOCATE (work)

      IF (divide_and_conquer) THEN
         DEALLOCATE (iwork)
      END IF

      CALL timestop(handle)

   END SUBROUTINE diamat_all

! **************************************************************************************************
!> \brief   Returns the dihedral angle, i.e. the angle between the planes
!>          defined by the vectors (-ab,bc) and (cd,-bc).
!>          The dihedral angle is returned in radians.
!> \param ab ...
!> \param bc ...
!> \param cd ...
!> \return ...
!> \date    13.03.2004
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION dihedral_angle(ab, bc, cd) RESULT(dihedral_angle_abcd)
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: ab, bc, cd
      REAL(KIND=dp)                                      :: dihedral_angle_abcd

      REAL(KIND=dp)                                      :: det_abcd
      REAL(KIND=dp), DIMENSION(3)                        :: abc, bcd

      abc = vector_product(bc, -ab)
      bcd = vector_product(cd, -bc)
      ! Calculate the normal vectors of the planes
      ! defined by the points a,b,c and b,c,d

      det_abcd = det_3x3(abc, bcd, -bc)
      dihedral_angle_abcd = SIGN(1.0_dp, det_abcd)*angle(abc, bcd)

   END FUNCTION dihedral_angle

! **************************************************************************************************
!> \brief   Return the diagonal elements of matrix a as a vector.
!> \param a ...
!> \return ...
!> \date    20.11.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION get_diag(a) RESULT(a_diag)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: a
      REAL(KIND=dp), &
         DIMENSION(MIN(SIZE(a, 1), SIZE(a, 2)))          :: a_diag

      INTEGER                                            :: i, n

      n = MIN(SIZE(a, 1), SIZE(a, 2))

      DO i = 1, n
         a_diag(i) = a(i, i)
      END DO

   END FUNCTION get_diag

! **************************************************************************************************
!> \brief   Returns the inverse of the 3 x 3 matrix a.
!> \param a ...
!> \return ...
!> \date    13.03.2004
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION inv_3x3(a) RESULT(a_inv)
      REAL(KIND=dp), DIMENSION(3, 3), INTENT(IN)         :: a
      REAL(KIND=dp), DIMENSION(3, 3)                     :: a_inv

      REAL(KIND=dp)                                      :: det_a

      det_a = 1.0_dp/det_3x3(a)

      a_inv(1, 1) = (a(2, 2)*a(3, 3) - a(3, 2)*a(2, 3))*det_a
      a_inv(2, 1) = (a(2, 3)*a(3, 1) - a(3, 3)*a(2, 1))*det_a
      a_inv(3, 1) = (a(2, 1)*a(3, 2) - a(3, 1)*a(2, 2))*det_a

      a_inv(1, 2) = (a(1, 3)*a(3, 2) - a(3, 3)*a(1, 2))*det_a
      a_inv(2, 2) = (a(1, 1)*a(3, 3) - a(3, 1)*a(1, 3))*det_a
      a_inv(3, 2) = (a(1, 2)*a(3, 1) - a(3, 2)*a(1, 1))*det_a

      a_inv(1, 3) = (a(1, 2)*a(2, 3) - a(2, 2)*a(1, 3))*det_a
      a_inv(2, 3) = (a(1, 3)*a(2, 1) - a(2, 3)*a(1, 1))*det_a
      a_inv(3, 3) = (a(1, 1)*a(2, 2) - a(2, 1)*a(1, 2))*det_a

   END FUNCTION inv_3x3

! **************************************************************************************************
!> \brief returns inverse of matrix using the lapack routines DGETRF and DGETRI
!> \param a ...
!> \param info ...
! **************************************************************************************************
   SUBROUTINE invmat(a, info)
      REAL(KIND=dp), INTENT(INOUT)                       :: a(:, :)
      INTEGER, INTENT(OUT)                               :: info

      CHARACTER(LEN=*), PARAMETER                        :: routineN = 'invmat'

      INTEGER                                            :: handle, lwork, n
      INTEGER, ALLOCATABLE                               :: ipiv(:)
      REAL(KIND=dp), ALLOCATABLE                         :: work(:)

      CALL timeset(routineN, handle)

      n = SIZE(a, 1)
      lwork = 20*n
      ALLOCATE (ipiv(n))
      ALLOCATE (work(lwork))
      ipiv = 0
      work = 0._dp
      info = 0
      CALL dgetrf(n, n, a, n, ipiv, info)
      IF (info == 0) THEN
         CALL dgetri(n, a, n, ipiv, work, lwork, info)
      END IF
      DEALLOCATE (ipiv, work)

      CALL timestop(handle)

   END SUBROUTINE invmat

! **************************************************************************************************
!> \brief returns inverse of real symmetric, positive definite matrix
!> \param a matrix
!> \param potrf if cholesky decomposition of a was already done using dpotrf.
!>        If not given, cholesky decomposition of a will be done before inversion.
!> \param uplo indicating if the upper or lower triangle of a is stored.
!> \author Dorothea Golze [02.2015]
! **************************************************************************************************
   SUBROUTINE invmat_symm(a, potrf, uplo)
      REAL(KIND=dp), INTENT(INOUT)                       :: a(:, :)
      LOGICAL, INTENT(IN), OPTIONAL                      :: potrf
      CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: uplo

      CHARACTER(LEN=*), PARAMETER                        :: routineN = 'invmat_symm'

      CHARACTER(LEN=1)                                   :: myuplo
      INTEGER                                            :: handle, info, n
      LOGICAL                                            :: do_potrf

      CALL timeset(routineN, handle)

      do_potrf = .TRUE.
      IF (PRESENT(potrf)) do_potrf = potrf

      myuplo = 'U'
      IF (PRESENT(uplo)) myuplo = uplo

      n = SIZE(a, 1)
      info = 0

      ! do cholesky decomposition
      IF (do_potrf) THEN
         CALL dpotrf(myuplo, n, a, n, info)
         IF (info /= 0) CPABORT("DPOTRF failed")
      END IF

      ! do inversion using the cholesky decomposition
      CALL dpotri(myuplo, n, a, n, info)
      IF (info /= 0) CPABORT("Matrix inversion failed")

      ! complete the matrix
      IF ((myuplo == "U") .OR. (myuplo == "u")) THEN
         CALL symmetrize_matrix(a, "upper_to_lower")
      ELSE
         CALL symmetrize_matrix(a, "lower_to_upper")
      END IF

      CALL timestop(handle)

   END SUBROUTINE invmat_symm

! **************************************************************************************************
!> \brief  Compute the inverse of the n by n real matrix a using the LAPACK
!>         library
!> \param a ...
!> \param a_inverse ...
!> \param eval_error ...
!> \param option ...
!> \param improve ...
!> \date   23.03.1999
!> \par Variables
!>       - a        : Real matrix to be inverted (input).
!>       - a_inverse: Inverse of the matrix a (output).
!>       - a_lu     : LU factorization of matrix a.
!>       - a_norm   : Norm of matrix a.
!>       - error    : Estimated error of the inversion.
!>       - r_cond   : Reciprocal condition number of the matrix a.
!>       - trans    : "N" => invert a
!>       -            "T" => invert transpose(a)
!> \author MK
!> \version 1.0
!> \note NB add improve argument, used to disable call to dgerfs
! **************************************************************************************************
   SUBROUTINE invert_matrix_d(a, a_inverse, eval_error, option, improve)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: a
      REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: a_inverse
      REAL(KIND=dp), INTENT(OUT)                         :: eval_error
      CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: option
      LOGICAL, INTENT(IN), OPTIONAL                      :: improve

      CHARACTER(LEN=1)                                   :: norm, trans
      CHARACTER(LEN=default_string_length)               :: message
      INTEGER                                            :: info, iter, n
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv, iwork
      LOGICAL                                            :: do_improve
      REAL(KIND=dp)                                      :: a_norm, old_eval_error, r_cond
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: berr, ferr, work
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: a_lu, b
      REAL(KIND=dp), EXTERNAL                            :: dlange

      EXTERNAL dgecon, dgerfs, dgetrf, dgetrs

      ! Check for optional parameter
      IF (PRESENT(option)) THEN
         trans = option
      ELSE
         trans = "N"
      END IF

      IF (PRESENT(improve)) THEN
         do_improve = improve
      ELSE
         do_improve = .TRUE.
      END IF

      ! Get the dimension of matrix a
      n = SIZE(a, 1)

      ! Check array dimensions
      IF (n == 0) THEN
         CPABORT("Matrix to be inverted of zero size")
      END IF

      IF (n /= SIZE(a, 2)) THEN
         CPABORT("Check the array bounds of parameter #1")
      END IF

      IF ((n /= SIZE(a_inverse, 1)) .OR. &
          (n /= SIZE(a_inverse, 2))) THEN
         CPABORT("Check the array bounds of parameter #2")
      END IF

      ! Allocate work storage
      ALLOCATE (a_lu(n, n))
      ALLOCATE (b(n, n))
      ALLOCATE (berr(n))
      ALLOCATE (ferr(n))
      ALLOCATE (ipiv(n))
      ALLOCATE (iwork(n))
      ALLOCATE (work(4*n))

      a_lu(1:n, 1:n) = a(1:n, 1:n)

      ! Compute the LU factorization of the matrix a
      CALL dgetrf(n, n, a_lu, n, ipiv, info)

      IF (info /= 0) THEN
         CPABORT("The LU factorization in dgetrf failed")
      END IF

      ! Compute the norm of the matrix a

      IF (trans == "N") THEN
         norm = '1'
      ELSE
         norm = 'I'
      END IF

      a_norm = dlange(norm, n, n, a, n, work)

      ! Compute the reciprocal of the condition number of a

      CALL dgecon(norm, n, a_lu, n, a_norm, r_cond, work, iwork, info)

      IF (info /= 0) THEN
         CPABORT("The computation of the condition number in dgecon failed")
      END IF

      IF (r_cond < EPSILON(0.0_dp)) THEN
         WRITE (message, "(A,ES10.3)") "R_COND =", r_cond
         CALL cp_abort(__LOCATION__, &
                       "Bad condition number "//TRIM(message)//" (smaller than the machine "// &
                       "working precision)")
      END IF

      ! Solve a system of linear equations using the LU factorization computed by dgetrf

      CALL unit_matrix(a_inverse)

      CALL dgetrs(trans, n, n, a_lu, n, ipiv, a_inverse, n, info)

      IF (info /= 0) THEN
         CPABORT("Solving the system of linear equations in dgetrs failed")
      END IF

      ! Improve the computed solution iteratively
      CALL unit_matrix(b) ! Initialize right-hand sides

      eval_error = 0.0_dp

      IF (do_improve) THEN
         DO iter = 1, 10

            CALL dgerfs(trans, n, n, a, n, a_lu, n, ipiv, b, n, a_inverse, n, ferr, berr, &
                        work, iwork, info)

            IF (info /= 0) THEN
               CPABORT("Improving the computed solution in dgerfs failed")
            END IF

            old_eval_error = eval_error
            eval_error = MAXVAL(ferr)

            IF (ABS(eval_error - old_eval_error) <= EPSILON(1.0_dp)) EXIT

         END DO
      END IF

      ! Release work storage
      DEALLOCATE (work)
      DEALLOCATE (iwork)
      DEALLOCATE (ipiv)
      DEALLOCATE (ferr)
      DEALLOCATE (berr)
      DEALLOCATE (b)
      DEALLOCATE (a_lu)

   END SUBROUTINE invert_matrix_d

! **************************************************************************************************
!> \brief  Compute the inverse of the n by n complex matrix a using the LAPACK
!>         library
!> \param a ...
!> \param a_inverse ...
!> \param eval_error ...
!> \param option ...
!> \date   08.06.2009
!> \par Variables
!>       - a        : Complex matrix to be inverted (input).
!>       - a_inverse: Inverse of the matrix a (output).
!>       - a_lu     : LU factorization of matrix a.
!>       - a_norm   : Norm of matrix a.
!>       - error    : Estimated error of the inversion.
!>       - r_cond   : Reciprocal condition number of the matrix a.
!>       - trans    : "N" => invert a
!>       -            "T" => invert transpose(a)
!> \author MK
!> \version 1.0
! **************************************************************************************************
   SUBROUTINE invert_matrix_z(a, a_inverse, eval_error, option)
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(IN)      :: a
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT)     :: a_inverse
      REAL(KIND=dp), INTENT(OUT)                         :: eval_error
      CHARACTER(LEN=1), INTENT(IN), OPTIONAL             :: option

      CHARACTER(LEN=1)                                   :: norm, trans
      CHARACTER(LEN=default_string_length)               :: message
      COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:)        :: work
      COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: a_lu, b
      INTEGER                                            :: info, iter, n
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ipiv
      REAL(KIND=dp)                                      :: a_norm, old_eval_error, r_cond
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: berr, ferr, rwork
      REAL(KIND=dp), EXTERNAL                            :: zlange

      EXTERNAL zgecon, zgerfs, zgetrf, zgetrs

      ! Check for optional parameter
      IF (PRESENT(option)) THEN
         trans = option
      ELSE
         trans = "N"
      END IF

      ! Get the dimension of matrix a
      n = SIZE(a, 1)

      ! Check array dimensions
      IF (n == 0) THEN
         CPABORT("Matrix to be inverted of zero size")
      END IF

      IF (n /= SIZE(a, 2)) THEN
         CPABORT("Check the array bounds of parameter #1")
      END IF

      IF ((n /= SIZE(a_inverse, 1)) .OR. &
          (n /= SIZE(a_inverse, 2))) THEN
         CPABORT("Check the array bounds of parameter #2")
      END IF

      ! Allocate work storage
      ALLOCATE (a_lu(n, n))
      ALLOCATE (b(n, n))
      ALLOCATE (berr(n))
      ALLOCATE (ferr(n))
      ALLOCATE (ipiv(n))
      ALLOCATE (rwork(2*n))
      ALLOCATE (work(2*n))

      a_lu(1:n, 1:n) = a(1:n, 1:n)

      ! Compute the LU factorization of the matrix a
      CALL zgetrf(n, n, a_lu, n, ipiv, info)

      IF (info /= 0) THEN
         CPABORT("The LU factorization in dgetrf failed")
      END IF

      ! Compute the norm of the matrix a

      IF (trans == "N") THEN
         norm = '1'
      ELSE
         norm = 'I'
      END IF

      a_norm = zlange(norm, n, n, a, n, work)

      ! Compute the reciprocal of the condition number of a

      CALL zgecon(norm, n, a_lu, n, a_norm, r_cond, work, rwork, info)

      IF (info /= 0) THEN
         CPABORT("The computation of the condition number in dgecon failed")
      END IF

      IF (r_cond < EPSILON(0.0_dp)) THEN
         WRITE (message, "(A,ES10.3)") "R_COND =", r_cond
         CALL cp_abort(__LOCATION__, &
                       "Bad condition number "//TRIM(message)//" (smaller than the machine "// &
                       "working precision)")
      END IF

      ! Solve a system of linear equations using the LU factorization computed by dgetrf

      CALL unit_matrix(a_inverse)

      CALL zgetrs(trans, n, n, a_lu, n, ipiv, a_inverse, n, info)

      IF (info /= 0) THEN
         CPABORT("Solving the system of linear equations in dgetrs failed")
      END IF

      ! Improve the computed solution iteratively
      CALL unit_matrix(b) ! Initialize right-hand sides

      eval_error = 0.0_dp

      DO iter = 1, 10

         CALL zgerfs(trans, n, n, a, n, a_lu, n, ipiv, b, n, a_inverse, n, ferr, berr, &
                     work, rwork, info)

         IF (info /= 0) THEN
            CPABORT("Improving the computed solution in dgerfs failed")
         END IF

         old_eval_error = eval_error
         eval_error = MAXVAL(ferr)

         IF (ABS(eval_error - old_eval_error) <= EPSILON(1.0_dp)) EXIT

      END DO

      ! Release work storage
      DEALLOCATE (work)
      DEALLOCATE (rwork)
      DEALLOCATE (ipiv)
      DEALLOCATE (ferr)
      DEALLOCATE (berr)
      DEALLOCATE (b)
      DEALLOCATE (a_lu)

   END SUBROUTINE invert_matrix_z

! **************************************************************************************************
!> \brief returns the pseudoinverse of a real, square matrix using singular
!>         value decomposition
!> \param a matrix a
!> \param a_pinverse pseudoinverse of matrix a
!> \param rskip parameter for setting small singular values to zero
!> \param determinant determinant of matrix a (optional output)
!> \param sval array holding singular values of matrix a (optional output)
!> \author Dorothea Golze [02.2015]
! **************************************************************************************************
   SUBROUTINE get_pseudo_inverse_svd(a, a_pinverse, rskip, determinant, sval)
      REAL(KIND=dp), DIMENSION(:, :)                     :: a, a_pinverse
      REAL(KIND=dp), INTENT(IN)                          :: rskip
      REAL(KIND=dp), INTENT(OUT), OPTIONAL               :: determinant
      REAL(KIND=dp), DIMENSION(:), INTENT(INOUT), &
         OPTIONAL, POINTER                               :: sval

      CHARACTER(LEN=*), PARAMETER :: routineN = 'get_pseudo_inverse_svd'

      INTEGER                                            :: handle, i, info, lwork, n
      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iwork
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: sig, work
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: sig_plus, temp_mat, u, vt

      CALL timeset(routineN, handle)

      n = SIZE(a, 1)
      ALLOCATE (u(n, n), vt(n, n), sig(n), sig_plus(n, n), iwork(8*n), work(1), temp_mat(n, n))
      u(:, :) = 0.0_dp
      vt(:, :) = 0.0_dp
      sig(:) = 0.0_dp
      sig_plus = 0.0_dp
      work = 0.0_dp
      iwork = 0
      IF (PRESENT(determinant)) determinant = 1.0_dp

      ! work size query
      lwork = -1
      CALL dgesdd('A', n, n, a(1, 1), n, sig(1), u(1, 1), n, vt(1, 1), n, work(1), &
                  lwork, iwork(1), info)

      IF (info /= 0) THEN
         CPABORT("ERROR in DGESDD: Could not retrieve work array sizes")
      END IF
      lwork = INT(work(1))
      DEALLOCATE (work)
      ALLOCATE (work(lwork))

      ! do SVD
      CALL dgesdd('A', n, n, a(1, 1), n, sig(1), u(1, 1), n, vt(1, 1), n, work(1), &
                  lwork, iwork(1), info)

      IF (info /= 0) THEN
         CPABORT("SVD failed")
      END IF

      IF (PRESENT(sval)) THEN
         CPASSERT(.NOT. ASSOCIATED(sval))
         ALLOCATE (sval(n))
         sval(:) = sig
      END IF

      ! set singular values that are too small to zero
      DO i = 1, n
         IF (sig(i) > rskip*MAXVAL(sig)) THEN
            IF (PRESENT(determinant)) &
               determinant = determinant*sig(i)
            sig_plus(i, i) = 1._dp/sig(i)
         ELSE
            sig_plus(i, i) = 0.0_dp
         END IF
      END DO

      ! build pseudoinverse: V*sig_plus*UT
      CALL dgemm("N", "T", n, n, n, 1._dp, sig_plus, n, u, n, 0._dp, temp_mat, n)
      CALL dgemm("T", "N", n, n, n, 1._dp, vt, n, temp_mat, n, 0._dp, a_pinverse, n)

      DEALLOCATE (u, vt, sig, iwork, work, sig_plus, temp_mat)

      CALL timestop(handle)

   END SUBROUTINE get_pseudo_inverse_svd

! **************************************************************************************************
!> \brief returns the pseudoinverse of a real, symmetric and positive definite
!>        matrix using diagonalization.
!> \param a matrix a
!> \param a_pinverse pseudoinverse of matrix a
!> \param rskip parameter for setting small eigenvalues to zero
!> \author Dorothea Golze [02.2015]
! **************************************************************************************************
   SUBROUTINE get_pseudo_inverse_diag(a, a_pinverse, rskip)
      REAL(KIND=dp), DIMENSION(:, :)                     :: a, a_pinverse
      REAL(KIND=dp), INTENT(IN)                          :: rskip

      CHARACTER(LEN=*), PARAMETER :: routineN = 'get_pseudo_inverse_diag'

      INTEGER                                            :: handle, i, info, lwork, n
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eig, work
      REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dinv, temp_mat

      CALL timeset(routineN, handle)

      info = 0
      n = SIZE(a, 1)
      ALLOCATE (dinv(n, n), eig(n), work(1), temp_mat(n, n))
      dinv(:, :) = 0.0_dp
      eig(:) = 0.0_dp
      work(:) = 0.0_dp
      temp_mat = 0.0_dp

      ! work size query
      lwork = -1
      CALL dsyev('V', 'U', n, a, n, eig(1), work(1), lwork, info)
      IF (info /= 0) THEN
         CPABORT("ERROR in DSYEV: Could not retrieve work array sizes")
      END IF
      lwork = INT(work(1))
      DEALLOCATE (work)
      ALLOCATE (work(lwork))
      work = 0.0_dp

      ! get eigenvalues and eigenvectors
      CALL dsyev('V', 'U', n, a, n, eig(1), work(1), lwork, info)

      IF (info /= 0) THEN
         CPABORT("Matrix diagonalization failed")
      END IF

      ! set eigenvalues that are too small to zero
      DO i = 1, n
         IF (eig(i) > rskip*MAXVAL(eig)) THEN
            dinv(i, i) = 1.0_dp/eig(i)
         ELSE
            dinv(i, i) = 0._dp
         END IF
      END DO

      ! build pseudoinverse: U*dinv*UT
      CALL dgemm("N", "T", n, n, n, 1._dp, dinv, n, a, n, 0._dp, temp_mat, n)
      CALL dgemm("N", "N", n, n, n, 1._dp, a, n, temp_mat, n, 0._dp, a_pinverse, n)

      DEALLOCATE (eig, work, dinv, temp_mat)

      CALL timestop(handle)

   END SUBROUTINE get_pseudo_inverse_diag

! **************************************************************************************************
!> \brief  Reflection of the vector a through a mirror plane defined by the
!>         normal vector b. The reflected vector a is stored in a_mirror.
!> \param a ...
!> \param b ...
!> \return ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION reflect_vector(a, b) RESULT(a_mirror)
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: a, b
      REAL(KIND=dp), DIMENSION(3)                        :: a_mirror

      REAL(KIND=dp)                                      :: length_of_b, scapro
      REAL(KIND=dp), DIMENSION(3)                        :: d

      length_of_b = SQRT(b(1)*b(1) + b(2)*b(2) + b(3)*b(3))

      IF (length_of_b > eps_geo) THEN

         d(:) = b(:)/length_of_b

         ! Calculate the mirror image a_mirror of the vector a
         scapro = a(1)*d(1) + a(2)*d(2) + a(3)*d(3)

         a_mirror(:) = a(:) - 2.0_dp*scapro*d(:)

      ELSE

         a_mirror(:) = 0.0_dp

      END IF

   END FUNCTION reflect_vector

! **************************************************************************************************
!> \brief   Rotation of the vector a about an rotation axis defined by the
!>          vector b. The rotation angle is phi (radians). The rotated vector
!>          a is stored in a_rot.
!> \param a ...
!> \param phi ...
!> \param b ...
!> \return ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION rotate_vector(a, phi, b) RESULT(a_rot)
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: a
      REAL(KIND=dp), INTENT(IN)                          :: phi
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: b
      REAL(KIND=dp), DIMENSION(3)                        :: a_rot

      REAL(KIND=dp)                                      :: length_of_b
      REAL(KIND=dp), DIMENSION(3, 3)                     :: rotmat

      length_of_b = SQRT(b(1)*b(1) + b(2)*b(2) + b(3)*b(3))
      IF (length_of_b > eps_geo) THEN

         ! Build up the rotation matrix rotmat
         CALL build_rotmat(phi, b, rotmat)

         ! Rotate the vector a by phi about the axis defined by vector b
         a_rot(:) = MATMUL(rotmat, a)

      ELSE

         a_rot(:) = 0.0_dp

      END IF

   END FUNCTION rotate_vector

! **************************************************************************************************
!> \brief   Set the diagonal elements of matrix a to b.
!> \param a ...
!> \param b ...
!> \date    20.11.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE SUBROUTINE set_diag_scalar_d(a, b)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a
      REAL(KIND=dp), INTENT(IN)                          :: b

      INTEGER                                            :: i, n

      n = MIN(SIZE(a, 1), SIZE(a, 2))
      DO i = 1, n
         a(i, i) = b
      END DO

   END SUBROUTINE set_diag_scalar_d

! **************************************************************************************************
!> \brief ...
!> \param a ...
!> \param b ...
! **************************************************************************************************
   PURE SUBROUTINE set_diag_scalar_z(a, b)
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT)   :: a
      COMPLEX(KIND=dp), INTENT(IN)                       :: b

      INTEGER                                            :: i, n

      n = MIN(SIZE(a, 1), SIZE(a, 2))
      DO i = 1, n
         a(i, i) = b
      END DO

   END SUBROUTINE set_diag_scalar_z

! **************************************************************************************************
!> \brief   Symmetrize the matrix a.
!> \param a ...
!> \param option ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   SUBROUTINE symmetrize_matrix(a, option)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a
      CHARACTER(LEN=*), INTENT(IN)                       :: option

      INTEGER                                            :: i, n

      n = MIN(SIZE(a, 1), SIZE(a, 2))

      IF (option == "lower_to_upper") THEN
         DO i = 1, n - 1
            a(i, i + 1:n) = a(i + 1:n, i)
         END DO
      ELSE IF (option == "upper_to_lower") THEN
         DO i = 1, n - 1
            a(i + 1:n, i) = a(i, i + 1:n)
         END DO
      ELSE IF (option == "anti_lower_to_upper") THEN
         DO i = 1, n - 1
            a(i, i + 1:n) = -a(i + 1:n, i)
         END DO
      ELSE IF (option == "anti_upper_to_lower") THEN
         DO i = 1, n - 1
            a(i + 1:n, i) = -a(i, i + 1:n)
         END DO
      ELSE
         CPABORT("Invalid option <"//TRIM(option)//"> was specified for parameter #2")
      END IF

   END SUBROUTINE symmetrize_matrix

! **************************************************************************************************
!> \brief   Set the matrix a to be a unit matrix.
!> \param a ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE SUBROUTINE unit_matrix_d(a)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a

      a(:, :) = 0.0_dp
      CALL set_diag(a, 1.0_dp)

   END SUBROUTINE unit_matrix_d

! **************************************************************************************************
!> \brief ...
!> \param a ...
! **************************************************************************************************
   PURE SUBROUTINE unit_matrix_z(a)
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(INOUT)   :: a

      a(:, :) = (0.0_dp, 0.0_dp)
      CALL set_diag(a, (1.0_dp, 0.0_dp))

   END SUBROUTINE unit_matrix_z

! **************************************************************************************************
!> \brief   Calculation of the vector product c = a x b.
!> \param a ...
!> \param b ...
!> \return ...
!> \date    16.10.1998
!> \author  MK
!> \version 1.0
! **************************************************************************************************
   PURE FUNCTION vector_product(a, b) RESULT(c)
      REAL(KIND=dp), DIMENSION(3), INTENT(IN)            :: a, b
      REAL(KIND=dp), DIMENSION(3)                        :: c

      c(1) = a(2)*b(3) - a(3)*b(2)
      c(2) = a(3)*b(1) - a(1)*b(3)
      c(3) = a(1)*b(2) - a(2)*b(1)

   END FUNCTION vector_product

! **************************************************************************************************
!> \brief computes the greatest common divisor of two number
!> \param a ...
!> \param b ...
!> \return ...
!> \author Joost VandeVondele
! **************************************************************************************************
   ELEMENTAL FUNCTION gcd(a, b)
      INTEGER, INTENT(IN)                                :: a, b
      INTEGER                                            :: gcd

      INTEGER                                            :: aa, ab, l, rem, s

      aa = ABS(a)
      ab = ABS(b)
      IF (aa < ab) THEN
         s = aa
         l = ab
      ELSE
         s = ab
         l = aa
      END IF
      IF (s /= 0) THEN
         DO
            rem = MOD(l, s)
            IF (rem == 0) EXIT
            l = s
            s = rem
         END DO
         GCD = s
      ELSE
         GCD = l
      END IF
   END FUNCTION gcd

! **************************************************************************************************
!> \brief computes the least common multiplier of two numbers
!> \param a ...
!> \param b ...
!> \return ...
!> \author Joost VandeVondele
! **************************************************************************************************
   ELEMENTAL FUNCTION lcm(a, b)
      INTEGER, INTENT(IN)                                :: a, b
      INTEGER                                            :: lcm

      INTEGER                                            :: tmp

      tmp = gcd(a, b)
      IF (tmp == 0) THEN
         lcm = 0
      ELSE
         ! could still overflow if the true lcm is larger than maxint
         lcm = ABS((a/tmp)*b)
      END IF
   END FUNCTION lcm

! **************************************************************************************************
!> \brief computes the exponential integral
!>      Ei(x) = Int(exp(-x*t)/t,t=1..infinity)  x>0
!> \param x ...
!> \return ...
!> \author JGH (adapted from Numerical recipies)
! **************************************************************************************************
   FUNCTION ei(x)
      REAL(dp)                                           :: x, ei

      INTEGER, PARAMETER                                 :: maxit = 100
      REAL(dp), PARAMETER                                :: eps = EPSILON(0.0_dp), &
                                                            fpmin = TINY(0.0_dp)

      INTEGER                                            :: k
      REAL(dp)                                           :: fact, prev, sum1, term

      IF (x <= 0._dp) THEN
         CPABORT("Invalid argument")
      END IF

      IF (x < fpmin) THEN
         ei = LOG(x) + euler
      ELSE IF (x <= -LOG(EPS)) THEN
         sum1 = 0._dp
         fact = 1._dp
         DO k = 1, maxit
            fact = fact*x/REAL(k, dp)
            term = fact/REAL(k, dp)
            sum1 = sum1 + term
            IF (term < eps*sum1) EXIT
         END DO
         ei = sum1 + LOG(x) + euler
      ELSE
         sum1 = 0._dp
         term = 1._dp
         DO k = 1, maxit
            prev = term
            term = term*REAL(k, dp)/x
            IF (term < eps) EXIT
            IF (term < prev) THEN
               sum1 = sum1 + term
            ELSE
               sum1 = sum1 - prev
               EXIT
            END IF
         END DO
         ei = EXP(x)*(1._dp + sum1)/x
      END IF

   END FUNCTION ei

! **************************************************************************************************
!> \brief computes the exponential integral
!>      En(x) = Int(exp(-x*t)/t^n,t=1..infinity)  x>0, n=0,1,..
!>      Note: Ei(-x) = -E1(x)
!> \param n ...
!> \param x ...
!> \return ...
!> \par History
!>      05.2007 Created
!> \author Manuel Guidon (adapted from Numerical recipies)
! **************************************************************************************************
   ELEMENTAL IMPURE FUNCTION expint(n, x)
      INTEGER, INTENT(IN)                                :: n
      REAL(dp), INTENT(IN)                               :: x
      REAL(dp)                                           :: expint

      INTEGER, PARAMETER                                 :: maxit = 100
      REAL(dp), PARAMETER :: eps = 6.e-14_dp, euler = 0.5772156649015328606065120_dp, &
         fpmin = TINY(0.0_dp)

      INTEGER                                            :: i, ii, nm1
      REAL(dp)                                           :: a, b, c, d, del, fact, h, psi

      nm1 = n - 1

      IF (n < 0 .OR. x < 0.0_dp .OR. (x == 0.0_dp .AND. (n == 0 .OR. n == 1))) THEN
         CPABORT("Invalid argument")
      ELSE IF (n == 0) THEN !Special case.
         expint = EXP(-x)/x
      ELSE IF (x == 0.0_dp) THEN !Another special case.
         expint = 1.0_dp/nm1
      ELSE IF (x > 1.0_dp) THEN !Lentz's algorithm (5.2).
         b = x + n
         c = 1.0_dp/FPMIN
         d = 1.0_dp/b
         h = d
         DO i = 1, MAXIT
            a = -i*(nm1 + i)
            b = b + 2.0_dp
            d = 1.0_dp/(a*d + b)
            c = b + a/c
            del = c*d
            h = h*del
            IF (ABS(del - 1.0_dp) < EPS) THEN
               expint = h*EXP(-x)
               RETURN
            END IF
         END DO
         CPABORT("continued fraction failed in expint")
      ELSE !Evaluate series.
         IF (nm1 /= 0) THEN !Set first term.
            expint = 1.0_dp/nm1
         ELSE
            expint = -LOG(x) - euler
         END IF
         fact = 1.0_dp
         DO i = 1, MAXIT
            fact = -fact*x/i
            IF (i /= nm1) THEN
               del = -fact/(i - nm1)
            ELSE
               psi = -euler !Compute I(n).
               DO ii = 1, nm1
                  psi = psi + 1.0_dp/ii
               END DO
               del = fact*(-LOG(x) + psi)
            END IF
            expint = expint + del
            IF (ABS(del) < ABS(expint)*EPS) RETURN
         END DO
         CPABORT("series failed in expint")
      END IF

   END FUNCTION expint

! **************************************************************************************************
!> \brief  Jacobi matrix diagonalization. The eigenvalues are returned in
!>         vector d and the eigenvectors are returned in matrix v in ascending
!>         order.
!>
!> \param a ...
!> \param d ...
!> \param v ...
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   SUBROUTINE jacobi(a, d, v)
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a
      REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: d
      REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: v

      INTEGER                                            :: n

      n = SIZE(d(:))

      ! Diagonalize matrix a
      CALL diag(n, a, d, v)

      ! Sort eigenvalues and eigenvector in ascending order
      CALL eigsrt(n, d, v)

   END SUBROUTINE jacobi

! **************************************************************************************************
!> \brief  Diagonalize matrix a. The eigenvalues are returned in vector d
!>         and the eigenvectors are returned in matrix v.
!>
!> \param n matrix/vector extent (problem size)
!> \param a matrix to be diagonalised
!> \param d vector of eigenvalues
!> \param v matrix of eigenvectors
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   SUBROUTINE diag(n, a, d, v)
      INTEGER, INTENT(IN)                                :: n
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: a
      REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: d
      REAL(KIND=dp), DIMENSION(:, :), INTENT(OUT)        :: v

      CHARACTER(len=*), PARAMETER                        :: routineN = 'diag'
      REAL(KIND=dp), PARAMETER                           :: a_eps = 1.0E-10_dp, d_eps = 1.0E-3_dp

      INTEGER                                            :: handle, i, ip, iq
      REAL(KIND=dp)                                      :: a_max, apq, c, d_min, dip, diq, g, h, s, &
                                                            t, tau, theta, tresh
      REAL(KIND=dp), DIMENSION(n)                        :: b, z

      CALL timeset(routineN, handle)

      a_max = 0.0_dp
      DO ip = 1, n - 1
         a_max = MAX(a_max, MAXVAL(ABS(a(ip, ip + 1:n))))
         b(ip) = a(ip, ip) ! get_diag(a)
      END DO
      b(n) = a(n, n)

      CALL unit_matrix(v)

      ! Go for 50 iterations
      DO i = 1, 50
         d = b
         d_min = MAX(d_eps, MINVAL(ABS(b)))
         IF (a_max < a_eps*d_min) THEN
            CALL timestop(handle)
            RETURN
         END IF
         tresh = MERGE(a_max, 0.0_dp, (i < 4))
         z = 0.0_dp
         DO ip = 1, n - 1
            DO iq = ip + 1, n
               dip = d(ip)
               diq = d(iq)
               apq = a(ip, iq)
               g = 100.0_dp*ABS(apq)
               IF (tresh < ABS(apq)) THEN
                  h = diq - dip
                  IF ((ABS(h) + g) /= ABS(h)) THEN
                     theta = 0.5_dp*h/apq
                     t = 1.0_dp/(ABS(theta) + SQRT(1.0_dp + theta**2))
                     IF (theta < 0.0_dp) t = -t
                  ELSE
                     t = apq/h
                  END IF
                  c = 1.0_dp/SQRT(1.0_dp + t**2)
                  s = t*c
                  tau = s/(1.0_dp + c)
                  h = t*apq
                  z(ip) = z(ip) - h
                  z(iq) = z(iq) + h
                  d(ip) = dip - h
                  d(iq) = diq + h
                  a(ip, iq) = 0.0_dp
                  CALL jrotate(a(1:ip - 1, ip), a(1:ip - 1, iq), s, tau)
                  CALL jrotate(a(ip, ip + 1:iq - 1), a(ip + 1:iq - 1, iq), s, tau)
                  CALL jrotate(a(ip, iq + 1:n), a(iq, iq + 1:n), s, tau)
                  CALL jrotate(v(:, ip), v(:, iq), s, tau)
               ELSE IF ((4 < i) .AND. &
                        ((ABS(dip) + g) == ABS(dip)) .AND. &
                        ((ABS(diq) + g) == ABS(diq))) THEN
                  a(ip, iq) = 0.0_dp
               END IF
            END DO
         END DO
         b = b + z
         a_max = 0.0_dp
         DO ip = 1, n - 1
            a_max = MAX(a_max, MAXVAL(ABS(a(ip, ip + 1:n))))
         END DO
      END DO
      WRITE (*, '(/,T2,A,/)') 'Too many iterations in jacobi'

      CALL timestop(handle)

   END SUBROUTINE diag

! **************************************************************************************************
!> \brief  Perform a Jacobi rotation of the vectors a and b.
!>
!> \param a ...
!> \param b ...
!> \param ss ...
!> \param tt ...
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   PURE SUBROUTINE jrotate(a, b, ss, tt)
      REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)         :: a, b
      REAL(KIND=dp), INTENT(IN)                          :: ss, tt

      REAL(KIND=dp)                                      :: u, v

      u = 1.0_dp - ss*tt
      v = ss/u

      a = a*u - b*ss
      b = b*(u + ss*v) + a*v

   END SUBROUTINE jrotate

! **************************************************************************************************
!> \brief Sort the values in vector d in ascending order and swap the
!>        corresponding columns of matrix v.
!>
!> \param n ...
!> \param d ...
!> \param v ...
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   SUBROUTINE eigsrt(n, d, v)
      INTEGER, INTENT(IN)                                :: n
      REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)         :: d
      REAL(KIND=dp), DIMENSION(:, :), INTENT(INOUT)      :: v

      INTEGER                                            :: i, j

      DO i = 1, n - 1
         j = SUM(MINLOC(d(i:n))) + i - 1
         IF (j /= i) THEN
            CALL swap(d(i), d(j))
            CALL swap(v(:, i), v(:, j))
         END IF
      END DO

   END SUBROUTINE eigsrt

! **************************************************************************
!> \brief Swap two scalars
!>
!> \param a ...
!> \param b ...
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   ELEMENTAL SUBROUTINE swap_scalar(a, b)
      REAL(KIND=dp), INTENT(INOUT)                       :: a, b

      REAL(KIND=dp)                                      :: c

      c = a
      a = b
      b = c

   END SUBROUTINE swap_scalar

! **************************************************************************
!> \brief Swap two vectors
!>
!> \param a ...
!> \param b ...
!> \par History
!>         - Creation (20.11.98, Matthias Krack)
! **************************************************************************************************
   SUBROUTINE swap_vector(a, b)
      REAL(KIND=dp), DIMENSION(:), INTENT(INOUT)         :: a, b

      INTEGER                                            :: i, n
      REAL(KIND=dp)                                      :: c

      n = SIZE(a)

      IF (n /= SIZE(b)) THEN
         CPABORT("Check the array bounds of the parameters")
      END IF

      DO i = 1, n
         c = a(i)
         a(i) = b(i)
         b(i) = c
      END DO

   END SUBROUTINE swap_vector

! **************************************************************************************************
!> \brief - compute a truncation radius for the shortrange operator
!> \param eps target accuracy!> \param omg screening parameter
!> \param omg ...
!> \param r_cutoff cutoff radius
!> \par History
!>      10.2012 created [Hossein Banihashemian]
!>      05.2019 moved here from hfx_types (A. Bussy)
!> \author Hossein Banihashemian
! **************************************************************************************************
   SUBROUTINE erfc_cutoff(eps, omg, r_cutoff)
      IMPLICIT NONE

      REAL(dp), INTENT(in)  :: eps, omg
      REAL(dp), INTENT(out) :: r_cutoff

      CHARACTER(LEN=*), PARAMETER :: routineN = 'erfc_cutoff'

      REAL(dp), PARAMETER :: abstol = 1E-10_dp, soltol = 1E-16_dp
      REAL(dp) :: r0, f0, fprime0, delta_r
      INTEGER :: iter, handle
      INTEGER, PARAMETER :: iterMAX = 1000

      CALL timeset(routineN, handle)

      ! initial guess assuming that we are in the asymptotic regime of the erf, and the solution is about 10.
      r0 = SQRT(-LOG(eps*omg*10**2))/omg
      CALL eval_transc_func(r0, eps, omg, f0, fprime0)

      DO iter = 1, iterMAX
         delta_r = f0/fprime0
         r0 = r0 - delta_r
         CALL eval_transc_func(r0, eps, omg, f0, fprime0)
         IF (ABS(delta_r) < abstol .OR. ABS(f0) < soltol) EXIT
      END DO
      CPASSERT(iter <= itermax)
      r_cutoff = r0

      CALL timestop(handle)
   CONTAINS
! **************************************************************************************************
!> \brief ...
!> \param r ...
!> \param eps ...
!> \param omega ...
!> \param fn ...
!> \param df ...
! **************************************************************************************************
      ELEMENTAL SUBROUTINE eval_transc_func(r, eps, omega, fn, df)
      REAL(dp), INTENT(in)                               :: r, eps, omega
      REAL(dp), INTENT(out)                              :: fn, df

      REAL(dp)                                           :: qr

         qr = omega*r
         fn = ERFC(qr) - r*eps
         df = -2.0_dp*oorootpi*omega*EXP(-qr**2) - eps
      END SUBROUTINE eval_transc_func
   END SUBROUTINE erfc_cutoff

! **************************************************************************************************
!> \brief Diagonalizes a local complex Hermitian matrix using LAPACK. Based on cp_cfm_heevd
!> \param matrix Hermitian matrix is preserved
!> \param eigenvectors ...
!> \param eigenvalues ...
!> \author A. Bussy
! **************************************************************************************************
   SUBROUTINE diag_complex(matrix, eigenvectors, eigenvalues)
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(IN)      :: matrix
      COMPLEX(KIND=dp), DIMENSION(:, :), INTENT(OUT)     :: eigenvectors
      REAL(KIND=dp), DIMENSION(:), INTENT(OUT)           :: eigenvalues

      CHARACTER(len=*), PARAMETER                        :: routineN = 'diag_complex'

      COMPLEX(KIND=dp), DIMENSION(:), ALLOCATABLE        :: work
      INTEGER                                            :: handle, info, liwork, lrwork, lwork, n
      INTEGER, DIMENSION(:), ALLOCATABLE                 :: iwork
      REAL(KIND=dp), DIMENSION(:), ALLOCATABLE           :: rwork

      CALL timeset(routineN, handle)

      IF (SIZE(matrix, 1) /= SIZE(matrix, 2)) CPABORT("Expected square matrix")
      ! IF (MAXVAL(ABS(matrix - CONJG(TRANSPOSE(matrix)))) > 1e-14_dp) CPABORT("Expected hermitian matrix")

      n = SIZE(matrix, 1)
      ALLOCATE (iwork(1), rwork(1), work(1))

      ! work space query
      lwork = -1
      lrwork = -1
      liwork = -1

      CALL zheevd('V', 'U', n, eigenvectors, n, eigenvalues, work, lwork, rwork, lrwork, iwork, liwork, info)

      lwork = CEILING(REAL(work(1), KIND=dp))
      lrwork = CEILING(rwork(1))
      liwork = iwork(1)

      DEALLOCATE (iwork, rwork, work)
      ALLOCATE (iwork(liwork), rwork(lrwork), work(lwork))
      eigenvectors(:, :) = matrix(:, :)

      ! final diagonalization
      CALL zheevd('V', 'U', n, eigenvectors, n, eigenvalues, work, lwork, rwork, lrwork, iwork, liwork, info)

      DEALLOCATE (iwork, rwork, work)

      IF (info /= 0) CPABORT("Diagonalisation of a complex matrix failed")

      CALL timestop(handle)

   END SUBROUTINE diag_complex

! **************************************************************************************************
!> \brief Helper routine for diagonalizing anti symmetric matrices
!> \param matrix ...
!> \param evecs ...
!> \param evals ...
! **************************************************************************************************
   SUBROUTINE diag_antisym(matrix, evecs, evals)
      REAL(dp), DIMENSION(:, :)                          :: matrix
      COMPLEX(dp), DIMENSION(:, :)                       :: evecs
      COMPLEX(dp), DIMENSION(:)                          :: evals

      COMPLEX(dp), ALLOCATABLE, DIMENSION(:, :)          :: matrix_c
      INTEGER                                            :: n
      REAL(dp), ALLOCATABLE, DIMENSION(:)                :: eigenvalues

      IF (SIZE(matrix, 1) /= SIZE(matrix, 2)) CPABORT("Expected square matrix")
      ! IF (MAXVAL(ABS(matrix + TRANSPOSE(matrix))) > 1e-14_dp) CPABORT("Expected anti-symmetric matrix")

      n = SIZE(matrix, 1)
      ALLOCATE (matrix_c(n, n), eigenvalues(n))

      matrix_c(:, :) = CMPLX(0.0_dp, -matrix, kind=dp)
      CALL diag_complex(matrix_c, evecs, eigenvalues)
      evals = CMPLX(0.0_dp, eigenvalues, kind=dp)

      DEALLOCATE (matrix_c, eigenvalues)
   END SUBROUTINE diag_antisym
! **************************************************************************************************
!> \brief Square array multiplication via LAPACK routines, leaves inputs unchanged
!> \param A_in Input matrix 1
!> \param A_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 1
!> \param B_in Input matrix 2
!> \param B_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 2
!> \param C_out Output matrix
!> \par History
!>    11.2025 created [Stepan Marek]
! **************************************************************************************************
   SUBROUTINE zgemm_square_2(A_in, A_trans, B_in, B_trans, C_out)
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(IN)      :: A_in
      CHARACTER, INTENT(IN)                              :: A_trans
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(IN)      :: B_in
      CHARACTER, INTENT(IN)                              :: B_trans
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(INOUT)   :: C_out

      CHARACTER(len=*), PARAMETER                        :: routineN = 'zgemm_square_2'

      INTEGER                                            :: handle, n

      n = SIZE(A_in, 1)
      IF (n /= SIZE(A_in, 2)) CPABORT("Non-square array 1 (A).")
      IF (n /= SIZE(B_in, 1)) CPABORT("Incompatible (rows) array 2 (B).")
      IF (n /= SIZE(B_in, 2)) CPABORT("Non-square array 2 (B).")
      IF (n /= SIZE(C_out, 1)) CPABORT("Incompatible (rows) result array 3 (C).")
      IF (n /= SIZE(C_out, 2)) CPABORT("Incompatible (cols) result array 3 (C).")
      IF (.NOT. (A_trans == 'N' .OR. A_trans == 'n' .OR. &
                 A_trans == 'T' .OR. A_trans == 't' .OR. &
                 A_trans == 'C' .OR. A_trans == 'c')) &
         CPABORT("Unknown transpose character for array 1 (A).")
      IF (.NOT. (B_trans == 'N' .OR. B_trans == 'n' .OR. &
                 B_trans == 'T' .OR. B_trans == 't' .OR. &
                 B_trans == 'C' .OR. B_trans == 'c')) &
         CPABORT("Unknown transpose character for array 2 (B).")

      CALL timeset(routineN, handle)

      CALL ZGEMM(A_trans, B_trans, n, n, n, z_one, A_in, n, B_in, n, z_zero, C_out, n)

      CALL timestop(handle)

   END SUBROUTINE zgemm_square_2
! **************************************************************************************************
!> \brief Square array multiplication via LAPACK routines, leaves inputs unchanged, real matrices
!> \param A_in Input matrix 1
!> \param A_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 1
!> \param B_in Input matrix 2
!> \param B_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 2
!> \param C_out Output matrix
!> \par History
!>    11.2025 created [Stepan Marek]
! **************************************************************************************************
   SUBROUTINE dgemm_square_2(A_in, A_trans, B_in, B_trans, C_out)
      REAL(kind=dp), DIMENSION(:, :), INTENT(IN)         :: A_in
      CHARACTER, INTENT(IN)                              :: A_trans
      REAL(kind=dp), DIMENSION(:, :), INTENT(IN)         :: B_in
      CHARACTER, INTENT(IN)                              :: B_trans
      REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT)      :: C_out

      CHARACTER(len=*), PARAMETER                        :: routineN = 'dgemm_square_2'

      INTEGER                                            :: handle, n

      n = SIZE(A_in, 1)
      IF (n /= SIZE(A_in, 2)) CPABORT("Non-square array 1 (A).")
      IF (n /= SIZE(B_in, 1)) CPABORT("Incompatible (rows) array 2 (B).")
      IF (n /= SIZE(B_in, 2)) CPABORT("Non-square array 2 (B).")
      IF (n /= SIZE(C_out, 1)) CPABORT("Incompatible (rows) result array 3 (C).")
      IF (n /= SIZE(C_out, 2)) CPABORT("Incompatible (cols) result array 3 (C).")
      IF (.NOT. (A_trans == 'N' .OR. A_trans == 'n' .OR. &
                 A_trans == 'T' .OR. A_trans == 't' .OR. &
                 A_trans == 'C' .OR. A_trans == 'c')) &
         CPABORT("Unknown transpose character for array 1 (A).")
      IF (.NOT. (B_trans == 'N' .OR. B_trans == 'n' .OR. &
                 B_trans == 'T' .OR. B_trans == 't' .OR. &
                 B_trans == 'C' .OR. B_trans == 'c')) &
         CPABORT("Unknown transpose character for array 2 (B).")

      CALL timeset(routineN, handle)

      CALL DGEMM(A_trans, B_trans, n, n, n, 1.0_dp, A_in, n, B_in, n, 0.0_dp, C_out, n)

      CALL timestop(handle)

   END SUBROUTINE dgemm_square_2
! **************************************************************************************************
!> \brief Square array multiplication via LAPACK routines, leaves inputs unchanged, for 3 matrices
!> \param A_in Input matrix 1
!> \param A_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 1
!> \param B_in Input matrix 2
!> \param B_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 2
!> \param C_in Input matrix 3
!> \param C_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 3
!> \param D_out Output matrix
!> \par History
!>    11.2025 created [Stepan Marek]
! **************************************************************************************************
   SUBROUTINE zgemm_square_3(A_in, A_trans, B_in, B_trans, C_in, C_trans, D_out)
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(IN)      :: A_in
      CHARACTER, INTENT(IN)                              :: A_trans
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(IN)      :: B_in
      CHARACTER, INTENT(IN)                              :: B_trans
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(IN)      :: C_in
      CHARACTER, INTENT(IN)                              :: C_trans
      COMPLEX(kind=dp), DIMENSION(:, :), INTENT(INOUT)   :: D_out

      CHARACTER(len=*), PARAMETER                        :: routineN = 'zgemm_square_3'

      COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:, :)     :: work
      INTEGER                                            :: handle, n

      n = SIZE(A_in, 1)
      IF (n /= SIZE(A_in, 2)) CPABORT("Non-square array 1 (A).")
      IF (n /= SIZE(B_in, 1)) CPABORT("Incompatible (rows) array 2 (B).")
      IF (n /= SIZE(B_in, 2)) CPABORT("Non-square array 2 (B).")
      IF (n /= SIZE(C_in, 1)) CPABORT("Incompatible (rows) array 3 (C).")
      IF (n /= SIZE(C_in, 2)) CPABORT("Non-square array 3 (C).")
      IF (n /= SIZE(D_out, 1)) CPABORT("Incompatible (rows) result array 4 (D).")
      IF (n /= SIZE(D_out, 2)) CPABORT("Incompatible (cols) result array 4 (D).")
      IF (.NOT. (A_trans == 'N' .OR. A_trans == 'n' .OR. &
                 A_trans == 'T' .OR. A_trans == 't' .OR. &
                 A_trans == 'C' .OR. A_trans == 'c')) &
         CPABORT("Unknown transpose character for array 1 (A).")
      IF (.NOT. (B_trans == 'N' .OR. B_trans == 'n' .OR. &
                 B_trans == 'T' .OR. B_trans == 't' .OR. &
                 B_trans == 'C' .OR. B_trans == 'c')) &
         CPABORT("Unknown transpose character for array 2 (B).")
      IF (.NOT. (C_trans == 'N' .OR. C_trans == 'n' .OR. &
                 C_trans == 'T' .OR. C_trans == 't' .OR. &
                 C_trans == 'C' .OR. C_trans == 'c')) &
         CPABORT("Unknown transpose character for array 3 (C).")

      CALL timeset(routineN, handle)

      ALLOCATE (work(n, n), source=z_zero)

      CALL ZGEMM(A_trans, B_trans, n, n, n, z_one, A_in, n, B_in, n, z_zero, work, n)
      CALL ZGEMM('N', C_trans, n, n, n, z_one, work, n, C_in, n, z_zero, D_out, n)

      DEALLOCATE (work)

      CALL timestop(handle)

   END SUBROUTINE zgemm_square_3
! **************************************************************************************************
!> \brief Square array multiplication via LAPACK routines, leaves inputs unchanged, for 3 matrices
!> \param A_in Input matrix 1
!> \param A_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 1
!> \param B_in Input matrix 2
!> \param B_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 2
!> \param C_in Input matrix 3
!> \param C_trans 'N' - no transpose, 'T' - transpose, 'C' - hermitian conj. of matrix 3
!> \param D_out Output matrix
!> \par History
!>    11.2025 created [Stepan Marek]
! **************************************************************************************************
   SUBROUTINE dgemm_square_3(A_in, A_trans, B_in, B_trans, C_in, C_trans, D_out)
      REAL(kind=dp), DIMENSION(:, :), INTENT(IN)         :: A_in
      CHARACTER, INTENT(IN)                              :: A_trans
      REAL(kind=dp), DIMENSION(:, :), INTENT(IN)         :: B_in
      CHARACTER, INTENT(IN)                              :: B_trans
      REAL(kind=dp), DIMENSION(:, :), INTENT(IN)         :: C_in
      CHARACTER, INTENT(IN)                              :: C_trans
      REAL(kind=dp), DIMENSION(:, :), INTENT(INOUT)      :: D_out

      CHARACTER(len=*), PARAMETER                        :: routineN = 'dgemm_square_3'

      INTEGER                                            :: handle, n
      REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :)        :: work

      n = SIZE(A_in, 1)
      IF (n /= SIZE(A_in, 2)) CPABORT("Non-square array 1 (A).")
      IF (n /= SIZE(B_in, 1)) CPABORT("Incompatible (rows) array 2 (B).")
      IF (n /= SIZE(B_in, 2)) CPABORT("Non-square array 2 (B).")
      IF (n /= SIZE(C_in, 1)) CPABORT("Incompatible (rows) array 3 (C).")
      IF (n /= SIZE(C_in, 2)) CPABORT("Non-square array 3 (C).")
      IF (n /= SIZE(D_out, 1)) CPABORT("Incompatible (rows) result array 4 (D).")
      IF (n /= SIZE(D_out, 2)) CPABORT("Incompatible (cols) result array 4 (D).")
      IF (.NOT. (A_trans == 'N' .OR. A_trans == 'n' .OR. &
                 A_trans == 'T' .OR. A_trans == 't' .OR. &
                 A_trans == 'C' .OR. A_trans == 'c')) &
         CPABORT("Unknown transpose character for array 1 (A).")
      IF (.NOT. (B_trans == 'N' .OR. B_trans == 'n' .OR. &
                 B_trans == 'T' .OR. B_trans == 't' .OR. &
                 B_trans == 'C' .OR. B_trans == 'c')) &
         CPABORT("Unknown transpose character for array 2 (B).")
      IF (.NOT. (C_trans == 'N' .OR. C_trans == 'n' .OR. &
                 C_trans == 'T' .OR. C_trans == 't' .OR. &
                 C_trans == 'C' .OR. C_trans == 'c')) &
         CPABORT("Unknown transpose character for array 3 (C).")

      CALL timeset(routineN, handle)

      ALLOCATE (work(n, n), source=0.0_dp)

      CALL DGEMM(A_trans, B_trans, n, n, n, 1.0_dp, A_in, n, B_in, n, 0.0_dp, work, n)
      CALL DGEMM('N', C_trans, n, n, n, 1.0_dp, work, n, C_in, n, 0.0_dp, D_out, n)

      DEALLOCATE (work)

      CALL timestop(handle)

   END SUBROUTINE dgemm_square_3

END MODULE mathlib
